In [1]:
import os
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
# from numpy.random import random, shuffle
%matplotlib inline

In [2]:
data_points = np.array([[0.1,0.3], [0.2, 0.6], [0.5,0.5], [0.7,0.4], [0.9,0.8]])
X = data_points[:, 0].reshape((1,5))
Y = data_points[:, 1].reshape((1,5))
learning_rate = 0.5
momentum = 0.9
'''
w1 = np.array([[0.5]])
w2 = np.ones((20,1))*0.5
w3 = np.ones((1,20))*0.5
'''
w1 = (np.random.random((1,1))-0.5)
w2 = (np.random.random((20,1))-0.5)
w3 = (np.random.random((1,20))-0.5)
b1 = np.array([[0.5]])
b2 = np.ones((20,1))*0.5
b3 = np.array([[0.5]])
W = [w1,w2,w3]
B = [b1,b2,b3]
A = dict()
Z = dict()
g_W_prev = [np.zeros((1,1)),np.zeros((20,1)), np.zeros((1,20))]
g_B_prev = [np.zeros((1,1)),np.zeros((20,1)), np.zeros((1,1))]

In [3]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def d_sigmoid(x):
    f = sigmoid(x)
    df = f * (1 - f)
    return df

In [4]:
def forward_prop(X, W, B):
    '''
    All output function the same: sigmoid
    '''
    input_vec = X
    for idx in range(len(W)):
        Z[idx] = W[idx].dot(input_vec) + B[idx]
        A[idx] = sigmoid(Z[idx])
        input_vec = A[idx]
    return Z, A

In [5]:
def back_prop(Y, W, B, X, A, Z):
    N = A[2].shape[1]

    delta3 = - (Y - A[2])
    delta2 = (W[2].T.dot(delta3))*d_sigmoid(Z[1])
    delta1 = (W[1].T.dot(delta2))*d_sigmoid(Z[0])
    
    g_w3 = delta3.dot(A[1].T)/ N
    g_w2 = delta2.dot(A[0].T)/ N
    g_w1 = delta1.dot(X.T)/ N

    g_b1 = np.mean(delta1, axis=1, keepdims=True)
    g_b2 = np.mean(delta2, axis=1, keepdims=True)
    g_b3 = np.mean(delta3, axis=1, keepdims=True)
    
    return [g_w1,g_w2,g_w3], [g_b1,g_b2,g_b3]

In [6]:
def update(g_W, g_B):
    for idx, paras in enumerate(zip(W, g_W)):
        W[idx] -= paras[1] * learning_rate + g_W_prev[idx]
    for idx, paras in enumerate(zip(B, g_B)):
        B[idx] -= paras[1] * learning_rate + g_B_prev[idx]

In [7]:
def error():
    _, A = forward_prop(X, W, B)
    Y_hat = A[2]
    loss = np.sum((Y_hat-Y) **2)/2
    return loss

In [8]:
def iterate():
    Z, A = forward_prop(X, W, B)
    g_W, g_B = back_prop(Y, W, B, X, A, Z)
    update(g_W, g_B)

In [9]:
def test(test_x):
    _ , A = forward_prop(np.array([test_x]), W, B)
    return A[2]

In [10]:
def get_figure():
    plt.scatter(X, Y)
    axes = plt.gca()
    axes.set_xlim([0,1])
    axes.set_ylim([0,1])
    test_x = np.linspace(0, 1, 1000)
    test_y = test(test_x)
    test_y = test_y.reshape(1000)
    plt.plot(test_x, test_y)

Results

Initial error / figure


In [11]:
error()


Out[11]:
0.077819079597374233

In [12]:
get_figure()


After 30000 Iterations


In [13]:
for _ in range(30000):
    iterate()

In [14]:
error()


Out[14]:
0.027781669250707654

In [150]:
get_figure()


After 150000 Iterations


In [15]:
for _ in range(120000):
    iterate()

In [16]:
get_figure()



In [17]:
error()


Out[17]:
0.023441333408527498

In [22]:
for _ in range(50000):
    iterate()
get_figure()
print(error())


0.0234043421846

In [ ]: